%matplotlib inline
import pandas as pd
import geopandas as gpd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import matplotlib
import json
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import os
from scipy.optimize import curve_fit
from scipy.interpolate import interp1d
font = {'family' : 'Linux Biolinum',
'weight' : 'bold',
'size' : 22}
matplotlib.rc('font', **font)
path_to_geojson = "geo-data-hungary/GeoJSON/"
population = 9769012
ventilators = 1e-5 * population * 22.8 * 0.5
ratio_of_ventilator_use = 0.10
df = pd.read_csv("dataset/koronavirus.gov.hu.csv")
df["New"] = (df["Active"] + df["Deaths"] + df["Recovered"]).diff()
df["DailyRate"] = df["New"]
for i in range(1, df.shape[0]):
if df["New"].iloc[i-1] == 0:
df.at[i, "DailyRate"] = np.NaN
else:
df.at[i, "DailyRate"] = df["DailyRate"].iloc[i] / df["New"].iloc[i-1]
df
def exponentialFunction(x, beta, A):
return A*np.power(beta, x)
def activeCasesPlot(ylog=False):
fig = go.Figure()
fig.add_trace(go.Scatter(x=df["Date"],
y=df["Active"],
mode='lines+markers',
name='Active cases',
line_shape="spline",
line=dict(width=2,
color="dodgerblue"),
marker=dict(size=10),)
)
fig.update_layout(
title="Active cases in Hungary, total active cases today: %d" % df["Active"].iloc[-1],
xaxis_title="Date of observation",
yaxis_title="Number of active cases",
font=dict(
family="Linux Biolinum",
size=18,
)
)
if ylog:
fig.update_layout(
yaxis_type="log",
)
fig.show()
activeCasesPlot()
def dailyNewCasesPlot(ylog=False):
fig = go.Figure()
fig.add_trace(go.Scatter(x=df["Date"],
y=df["New"],
mode='lines+markers',
name='Daily new cases',
line_shape="spline",
line=dict(width=2,
color="dodgerblue"),
marker=dict(size=10),)
)
fig.update_layout(
title="Daily new cases in Hungary, today: %d" % df["New"].iloc[-1],
xaxis_title="Date of observation",
yaxis_title="Number of daily new cases",
font=dict(
family="Linux Biolinum",
size=18,
)
)
if ylog:
fig.update_layout(
yaxis_type="log",
)
fig.show()
dailyNewCasesPlot()
def dailyNewCasesRatioPlot(ylog=False):
fig = go.Figure()
fig.add_trace(go.Scatter(x=df["Date"],
y=df["DailyRate"],
mode='lines+markers',
name='Ratio of daily new cases compared to previous day',
line_shape="spline",
line=dict(width=2,
color="dodgerblue"),
marker=dict(size=10),)
)
fig.update_layout(
title="Ratio of daily new cases compared to previous day in Hungary, average: %.2f" % df["DailyRate"].mean(),
xaxis_title="Date of observation",
yaxis_title="Ratio of daily new cases",
font=dict(
family="Linux Biolinum",
size=18,
)
)
if ylog:
fig.update_layout(
yaxis_type="log",
)
fig.show()
dailyNewCasesRatioPlot()
dividers = [16,35,41]
popts = []
pcovs = []
for i in range(len(dividers)):
if i == len(dividers)-1:
popt, pcov = curve_fit(exponentialFunction, df.index[dividers[i]:], df.Deaths.iloc[dividers[i]:])
else:
popt, pcov = curve_fit(exponentialFunction, df.index[dividers[i]:dividers[i+1]], df.Deaths.iloc[dividers[i]:dividers[i+1]])
popts.append(popt)
pcovs.append(pcov)
def deathCasesPlot(ylog=False):
day_limit=65
fig = go.Figure()
fig.add_trace(go.Bar(
x=df["Date"],
y=df["Deaths"].diff(),
name="Daily death cases",
marker=dict(
color="tomato",
)
))
fig.add_trace(go.Scatter(x=df["Date"],
y=df["Deaths"],
mode='lines+markers',
name='Death cases',
line_shape="spline",
line=dict(width=2,
color='firebrick',
),
marker=dict(size=10),)
)
betas = []
for j,i in enumerate(popts):
betas.append("%.2f" % i[0])
fig.add_trace(go.Scatter(
x=df["Date"].iloc[dividers],
y=df["Deaths"].iloc[dividers],
text=betas,
textposition="top center",
mode="markers+text",
name=r"New $\beta$ value",
marker=dict(
size=15,
color="rgba(0, 0, 0, 0.4)"
)
))
for i in range(len(dividers)):
line = dict(
width=1,
dash="dash"
)
if (i == len(dividers)-1):
line = dict(
width=2,
)
base = df["Date"].iloc[dividers[i]]
date_list = [datetime.datetime.strptime(base, "%Y-%m-%d") + datetime.timedelta(days=x) for x in range(day_limit-dividers[i])]
fig.add_trace(go.Scatter(x=date_list,
y=exponentialFunction(range(dividers[i],dividers[i]+len(date_list)), *popts[i]),
mode='lines',
name=r'Fitted exponential $\beta=%.2f$' % popts[i][0],
line_shape="spline",
line=line,
marker=dict(size=10),)
)
fig.update_layout(
title="Death cases in Hungary, total: %d" % df["Deaths"].iloc[-1],
xaxis_title="Date of observation",
yaxis_title="Number of death cases",
font=dict(
family="Linux Biolinum",
size=18,
)
)
if ylog:
fig.update_layout(
yaxis_type="log",
)
fig.show()
deathCasesPlot(True)
popt, pcov = curve_fit(exponentialFunction, df.index, df.Recovered)
def recoveredCasesPlot(ylog=False):
fig = go.Figure()
fig.add_trace(go.Bar(
x=df["Date"],
y=df["Recovered"].diff(),
name="Daily recovered cases",
marker=dict(
color="chartreuse",
)
))
fig.add_trace(go.Scatter(x=df["Date"],
y=df["Recovered"],
mode='lines+markers',
name='Recovered cases',
line_shape="spline",
line=dict(width=2,
color='seagreen',
),
marker=dict(size=10),))
fig.update_layout(
title="Recovered cases in Hungary, total: %d" % df["Recovered"].iloc[-1],
xaxis_title="Date of observation",
yaxis_title="Number of recovered cases",
font=dict(
family="Linux Biolinum",
size=18,
)
)
if ylog:
fig.update_layout(
yaxis_type="log",
)
fig.show()
recoveredCasesPlot(True)
def combinedCasesPlot(ylog=False):
fig = go.Figure()
fig.add_trace(go.Bar(
x=df["Date"],
y=df["Deaths"],
name='Death cases',
marker=dict(
color="tomato"),
))
fig.add_trace(go.Bar(
x=df["Date"],
y=df["Recovered"],
name="Recovered cases",
marker=dict(
color="chartreuse",
)
))
fig.add_trace(go.Bar(
x=df["Date"],
y=df["Active"],
name='Active cases',
marker=dict(
color="dodgerblue"),
))
fig.update_layout(
title="Current situation in Hungary",
barmode="stack",
xaxis_title="Date of observation",
yaxis_title="Number of cases",
font=dict(
family="Linux Biolinum",
size=18,
)
)
if ylog:
fig.update_layout(
yaxis_type="log",
)
fig.show()
combinedCasesPlot()
def dailyCombinedCasesPlot(ylog=False):
fig = go.Figure()
fig.add_trace(go.Bar(
x=df["Date"],
y=df["Deaths"].diff(),
name='Daily death cases',
marker=dict(
color="tomato"),
))
fig.add_trace(go.Bar(
x=df["Date"],
y=df["Recovered"].diff(),
name="Daily recovered cases",
marker=dict(
color="chartreuse",
)
))
fig.add_trace(go.Bar(
x=df["Date"],
y=df["New"],
name='Daily new cases',
marker=dict(
color="dodgerblue"),
))
fig.update_layout(
title="Current situation in Hungary",
barmode="stack",
xaxis_title="Date of observation",
yaxis_title="Number of daily new cases",
font=dict(
family="Linux Biolinum",
size=18,
)
)
if ylog:
fig.update_layout(
yaxis_type="log",
)
fig.show()
dailyCombinedCasesPlot()
fig = go.Figure()
fig.add_trace(go.Scatter(
x=df["Active"] + df["Deaths"] + df["Recovered"],
y=df["Deaths"],
mode='lines+markers',
name='Recovered cases',
line_shape="spline",
line=dict(width=2,
color='orange',
),
marker=dict(size=10),))
fig.update_layout(
title="Deaths as a function of registered cases",
xaxis_title="Number of registered cases",
yaxis_title="Number of deaths",
font=dict(
family="Linux Biolinum",
size=18,
),
#yaxis_type="log"
)
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(
x=df["Active"] + df["Deaths"] + df["Recovered"],
y=df["Deaths"].diff() / (df["Active"] + df["Deaths"] + df["Recovered"]).diff(),
mode='lines+markers',
name='Recovered cases',
line=dict(width=2,
color='orange',
),
marker=dict(size=10),))
fig.update_layout(
title="Slope of the deaths as a function of registered cases curve",
xaxis_title="Number of registered cases",
yaxis_title="Slope",
font=dict(
family="Linux Biolinum",
size=18,
),
#yaxis_type="log"
)
fig.show()
1.08333333333# By index, selected manually
divider_points = [0, 5, 15, 25, 36, 39]
# Extrapolation till this day from 2020-03-04
day_limit = 80
popts = []
pcovs = []
ranges = []
evaluated_fits = []
for i in range(len(divider_points)-1):
ranges.append(df.index[divider_points[i]:divider_points[i+1]])
popt, pcov = curve_fit(exponentialFunction,
ranges[-1],
df["Active"].iloc[divider_points[i]:divider_points[i+1]])
popts.append(popt)
pcovs.append(pcov)
evaluated_fits.append(exponentialFunction(ranges[-1], *popts[-1]))
ranges.append(df.index[divider_points[-1]:])
popt, pcov = curve_fit(exponentialFunction,
ranges[-1],
df["Active"].iloc[divider_points[-1]:])
popts.append(popt)
pcovs.append(pcov)
evaluated_fits.append(exponentialFunction(ranges[-1], *popts[-1]))
extra_range = range(df.index[-1]+1, day_limit)
extra_y = exponentialFunction(extra_range, *popts[-1])
base = df["Date"].iloc[0]
date_list = [datetime.datetime.strptime(base, "%Y-%m-%d") + datetime.timedelta(days=x) for x in range(day_limit)]
fig = go.Figure()
fig.add_trace(go.Scatter(
x=date_list,
y=np.hstack((
np.hstack(evaluated_fits),
extra_y
)),
name="Model",
line=dict(
width=2,
)
))
fig.add_trace(go.Scatter(x=df["Date"],
y=df["Active"],
mode='markers',
name='Data',
marker=dict(
size=10,
)
))
betas = []
for i in popts:
betas.append("%.2f" % i[0])
fig.add_trace(go.Scatter(
x=df["Date"].iloc[divider_points],
y=df["Active"].iloc[divider_points],
text=betas,
textposition="top center",
mode="markers+text",
name=r"New $\beta$ value",
marker=dict(
size=15,
color="rgba(0, 0, 0, 0.4)"
)
))
for j,i in enumerate(popts[:-1]):
fig.add_trace(go.Scatter(
x=date_list[divider_points[j]:],
y=exponentialFunction(range(divider_points[j], day_limit), *i),
name=r"%.2f" % i[0],
line=dict(
width=1,
dash="dash"
)
))
# Calculate ventilator capacity
out_of_stock = np.floor(np.log(ventilators/popts[-1][1]/ratio_of_ventilator_use)/np.log(popts[-1][0]))
day = datetime.datetime.strptime(base, "%Y-%m-%d") + datetime.timedelta(days=out_of_stock)
fig.update_layout(
title="Active cases in Hungary, expected ventilator shortage: %s" % day.strftime("%Y-%m-%d"),
xaxis_title="Date",
yaxis_title="Number of cases",
font=dict(
family="Linux Biolinum",
size=18,
),
yaxis_type="log"
)
fig.show()
# load geodata
base = df["Date"].iloc[0]
today = (datetime.datetime.strptime(base, "%Y-%m-%d") + datetime.timedelta(days=df.index[-1])).strftime("%Y-%m-%d")
yesterday = (datetime.datetime.strptime(base, "%Y-%m-%d") + datetime.timedelta(days=df.index[-2])).strftime("%Y-%m-%d")
geodf = None
for filename in os.listdir(path_to_geojson + "l30-county/"):
tmp = gpd.read_file(path_to_geojson + "l30-county/" + filename)
if geodf is None:
geodf = tmp
else:
geodf = pd.concat([geodf, tmp], ignore_index=True)
geodf = geodf.set_index("name")
dfc = pd.read_csv("dataset/county_data.csv")
dfc2 = pd.read_csv("dataset/county_data.csv")
dfc2 = dfc2.set_index("Date")
dfc2 = dfc2.sort_values(today, axis=1, ascending=False)
dfc = dfc.transpose()
new_header = dfc.iloc[0]
dfc = dfc.iloc[1:]
dfc.columns = new_header
geodf = geodf.join(dfc)
geodf["text_coord"] = geodf["geometry"].apply(lambda x: x.representative_point().coords[:])
geodf["text_coord"] = [coords[0] for coords in geodf["text_coord"]]
# load county data
popdf = pd.read_csv("dataset/county_population.csv")
popdf = popdf.transpose()
popdf.columns = ["Population"]
popdf
fig, ax = plt.subplots(1, figsize=(15, 15))
ax = geodf.plot(column=today, cmap='Wistia', ax=ax)
for idx, row in geodf.iterrows():
ax.annotate(s=row[today], xy=row['text_coord'],
horizontalalignment='center')
ax.set_title("Cummulative registered cases")
plt.axis("off")
plt.show()
geodf["per1000"] = geodf[today]/popdf["Population"]*1000.
fig, ax = plt.subplots(1, figsize=(15, 15))
ax = geodf.plot(column="per1000", cmap='Wistia', ax=ax)
for idx, row in geodf.iterrows():
ax.annotate(s="%.2f" % (row[today]/popdf["Population"][row.name]*1000.), xy=row['text_coord'],
horizontalalignment='center')
ax.set_title("Cummulative registered cases per 1000 person")
plt.axis("off")
plt.show()
fig, ax = plt.subplots(1, figsize=(15, 15))
geodf["difference"] = geodf[today] - geodf[yesterday]
ax = geodf.plot(column="difference", cmap='Wistia', ax=ax)
for idx, row in geodf.iterrows():
ax.annotate(s=row["difference"], xy=row['text_coord'],
horizontalalignment='center')
ax.set_title("Registered new cases")
plt.axis("off")
plt.show()
fig, ax = plt.subplots(1, figsize=(15, 15))
geodf["differenceper100000"] = geodf["difference"]/popdf["Population"]*100000.
ax = geodf.plot(column="differenceper100000", cmap='Wistia', ax=ax)
for idx, row in geodf.iterrows():
ax.annotate(s="%.2f" % row["differenceper100000"], xy=row['text_coord'],
horizontalalignment='center')
ax.set_title("Registered new cases per 100000 person")
plt.axis("off")
plt.show()
fig = go.Figure()
for i in range(dfc2.shape[1]):
fig.add_trace(go.Scatter(
x=dfc2.index,
y=dfc2[dfc2.columns[i]],
mode='lines+markers',
marker=dict(
symbol=i+1,
size=10,
),
name=dfc2.columns[i],
),)
fig.update_layout(
title="Registered cases in the different counties",
xaxis_title="Date",
yaxis_title="Number of cases",
font=dict(
family="Linux Biolinum",
size=18,
),
)
fig.show()